library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

RRPLOTS and flchain

odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
0 1
5705 2169
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))

data$`(Intercept)` <- NULL

dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
                              status=odata[rownames(data),"death"],
                              data))
pander::pander(table(dataFL$status))
0 1
4562 1962
dataFL$time <- dataFL$time/365

Exploring Raw Features with RRPlot

convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
age kappa lambda creatinine
0 0 0 0
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]

topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
                  title=topFive[1])

  par(op)

## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
                  timetoEvent=dataFL$time,
                  atRate=c(0.90,0.80),
                  title=topf)
  idx <- idx + 1
  par(op)
}

names(RRanalysis) <- topFive

Reporting the Metrics

pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100
Thr 73.000 69.000 69.000 54.000 5.00e+01
RR 4.013 4.399 4.430 5.672 1.00e+00
RR_LCI 3.740 4.045 4.077 4.529 0.00e+00
RR_UCI 4.305 4.783 4.814 7.104 0.00e+00
SEN 0.581 0.713 0.708 0.963 1.00e+00
SPE 0.883 0.790 0.798 0.241 8.77e-04
BACC 0.732 0.752 0.753 0.602 5.00e-01
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.822 0.811 0.834
kappa 0.682 0.667 0.697
lambda 0.665 0.650 0.680
creatinine 0.591 0.575 0.607
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.775 0.775 0.764 0.784
kappa 0.671 0.671 0.658 0.684
lambda 0.657 0.657 0.645 0.670
creatinine 0.587 0.587 0.573 0.601
pander::pander(LogRangp)
age 0.00e+00
kappa 4.90e-175
lambda 4.41e-145
creatinine 2.67e-67
pander::pander(Sensitivity)
  est lower upper
age 0.581 0.558 0.602
kappa 0.319 0.298 0.340
lambda 0.300 0.279 0.321
creatinine 0.288 0.269 0.309
pander::pander(Specificity)
  est lower upper
age 0.883 0.873 0.892
kappa 0.900 0.891 0.908
lambda 0.899 0.890 0.907
creatinine 0.865 0.854 0.875
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.822 0.775 0.581 0.883
kappa 0.682 0.671 0.319 0.900
lambda 0.665 0.657 0.300 0.899
creatinine 0.591 0.587 0.288 0.865

Train Test Set

trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]


pander::pander(table(dataFLTrain$status))
0 1
2276 986
pander::pander(table(dataFLTest$status))
0 1
2286 976

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower mean upper u.Accuracy r.Accuracy
age 0.0997 0.0747 0.0803 0.0863 0.735 0.607
flc.grp 0.0561 0.0166 0.0460 0.0743 0.607 0.733
creatinine 0.1408 0.0472 0.1617 0.5696 0.560 0.732
lambda 0.1774 0.0754 0.1544 0.2383 0.651 0.734
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
age 0.735 0.752 0.630 0.759 0.201360 0.8917
flc.grp 0.735 0.631 0.756 0.759 0.003550 0.2886
creatinine 0.735 0.556 0.756 0.759 0.000979 0.0749
lambda 0.735 0.619 0.757 0.759 0.001414 -0.1164
  z.IDI z.NRI Delta.AUC Frequency
age 28.41 27.02 -0.12884 1
flc.grp 3.68 7.73 -0.00313 1
creatinine 3.28 1.97 -0.00277 1
lambda 1.86 -3.10 -0.00189 1

Cox Model Performance

The evaluation of the raw Cox model with RRPlot()

timeinterval <- 5

h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.136 5
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 986 925 1050 1361 1.46e-26
low 259 228 293 351 3.33e-07
90% 142 120 167 178 6.17e-03
80% 585 539 634 830 3.52e-19
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Test results

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories on test set

obsexp <- rrAnalysisTest$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 973 913 1036 1363 1.01e-28
low 276 244 311 358 7.80e-06
90% 137 115 162 184 3.44e-04
80% 563 517 611 821 1.90e-21
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Test: Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time")

( 15.1813 , 16328.02 , 1.286029 , 986 , 1096.689 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.26 0.722 15.2

The RRplot() of the calibrated model

index <- predict(ml,dataFLTrain)

calrdata <- cbind(dataFLTrain$status,ppoisGzero(index,calprob$h0))


rrAnalysisCalTrain <- RRPlot(calrdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 986 925 1050 856 0.000013
low 259 228 293 221 0.012809
90% 141 119 166 112 0.007991
80% 586 540 635 521 0.005407
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Cal. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Checking the test set

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,calprob$h0))

rrAnalysisCalTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories test set

obsexp <- rrAnalysisCalTest$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 973 913 1036 858 0.000113
low 276 244 311 226 0.001094
90% 137 115 162 116 0.056594
80% 563 517 611 515 0.038392
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Test Set. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)